Homework 2

Tasks 1 & 2

In this part matches and odds matrices are altered. IsOver and Outcome columns are added to the matches table for the outcomes of a match. Also, some oddtypes are changed to display these odds as different features in the resulting matrix.

matches[,IsOver:=0]
matches[TotalScore>2,IsOver:=1]
matches[, c("ScoreDifference"):= (HomeScore-AwayScore)]
matches[, c("Outcome") := 1] # 1 for Home wins
matches[ScoreDifference < 0 , Outcome := -1,] # -1 for Away wins
matches[ScoreDifference == 0 , Outcome := 0,] # 0 for Tie
odds=odds[totalhandicap==2.5 & oddtype == "over", oddtype := "over2.5",]
odds=odds[totalhandicap==2.5 & oddtype == "under", oddtype := "under2.5",]
odds=odds[betType=="ah" & oddtype == "1", oddtype := "ah1",]
odds=odds[betType=="ah" & oddtype == "2", oddtype := "ah2",]
odds=odds[betType=="ha" & oddtype == "1", oddtype := "ha1",]
odds=odds[betType=="ha" & oddtype == "2", oddtype := "ha2",]
odds=odds[order(matchId, oddtype,bookmaker,date)]
#taking the final odds into separate data tables
odds_final=odds[,list(final_odd=odd[.N]),
                      by=list(matchId,oddtype,bookmaker)]

For this part of the assignment, the odds of 5 different bookmakers are observed. First the odds of a bookmaker are turned to features for every match. Then PCA is applied to the matrix and the results are plotted. Different colors on the plot refer to different match outcomes. For the 1st PCA plot the pints are color coded according to a match ending with a total score more than 2.5. For the 2nd PCA plots the points are color coded according to the match outcomes; Home, Tie and Away. MDS is also applied by computing the distance matrix of every bookmaker. The distances are calculated with 2 differnt methods; Manhattan and Euclidean. Below you can see the plots of 5 bookmakers.

Betsson

summary(pca_x, loadings = T)
## Importance of components:
##                          Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     2.379861 1.8711034 1.0978310 0.92332549 0.76859208
## Proportion of Variance 0.471978 0.2917523 0.1004361 0.07104416 0.04922782
## Cumulative Proportion  0.471978 0.7637303 0.8641664 0.93521056 0.98443838
##                             Comp.6      Comp.7      Comp.8       Comp.9
## Standard deviation     0.280386710 0.257233710 0.153415466 0.0860959373
## Proportion of Variance 0.006551392 0.005514098 0.001961359 0.0006177092
## Cumulative Proportion  0.990989772 0.996503871 0.998465229 0.9990829386
##                             Comp.10      Comp.11      Comp.12
## Standard deviation     0.0786803530 0.0575688033 0.0387294743
## Proportion of Variance 0.0005158832 0.0002761806 0.0001249977
## Cumulative Proportion  0.9995988217 0.9998750023 1.0000000000
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## ha1       0.228  0.435                0.213         0.223  0.221  0.429
## ha2      -0.408                       0.239         0.365  0.293 -0.577
## 12        0.303 -0.338        -0.128 -0.137 -0.359  0.737 -0.276       
## 1X        0.251  0.422                0.150         0.173        -0.537
## X2       -0.412                       0.173         0.199         0.262
## odd1      0.248  0.418                0.208         0.186  0.102  0.139
## odd2     -0.407                       0.227         0.278  0.175  0.321
## oddX     -0.322  0.311         0.137  0.230 -0.311 -0.103 -0.740       
## over                    0.724  0.569 -0.377                            
## under                  -0.673  0.702 -0.224                            
## over2.5   0.236 -0.338         0.298  0.540 -0.557 -0.267  0.245       
## under2.5 -0.264  0.348        -0.168 -0.441 -0.669         0.367       
##          Comp.10 Comp.11 Comp.12
## ha1       0.230   0.430   0.427 
## ha2       0.261  -0.140   0.349 
## 12                              
## 1X       -0.288   0.399  -0.411 
## X2        0.487   0.212  -0.633 
## odd1             -0.766  -0.237 
## odd2     -0.744                 
## oddX                      0.255 
## over                            
## under                           
## over2.5                         
## under2.5
par(mfrow=c(1,1))
plot(pca_x, main = "Betsson PCA")

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "Betsson O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "Betsson Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 1)

plot(mds_manh[,1],mds_manh[,2],main='Betsson O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(mds_eucl[,1],mds_eucl[,2],main='Betsson O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

Paddy Power

summary(pca_x, loadings = T)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     2.4527325 1.7921064 1.3192407 1.11081027 0.91237449
## Proportion of Variance 0.4297069 0.2294032 0.1243140 0.08813568 0.05945909
## Cumulative Proportion  0.4297069 0.6591101 0.7834241 0.87155981 0.93101889
##                            Comp.6     Comp.7      Comp.8      Comp.9
## Standard deviation     0.70464292 0.48110695 0.367953679 0.212940386
## Proportion of Variance 0.03546583 0.01653314 0.009670708 0.003238829
## Cumulative Proportion  0.96648472 0.98301786 0.992688567 0.995927396
##                            Comp.10     Comp.11      Comp.12      Comp.13
## Standard deviation     0.162804498 0.119002492 0.0824562616 0.0756780979
## Proportion of Variance 0.001893236 0.001011542 0.0004856454 0.0004090839
## Cumulative Proportion  0.997820632 0.998832175 0.9993178199 0.9997269038
##                             Comp.14
## Standard deviation     0.0618332212
## Proportion of Variance 0.0002730962
## Cumulative Proportion  1.0000000000
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## ha1       0.203  0.473                       0.186                0.111
## ha2      -0.397                              0.225         0.129  0.132
## ah1                    -0.699                      -0.702              
## ah2                     0.702                      -0.703              
## 12        0.323 -0.296                                    -0.725  0.471
## 1X        0.214  0.466                       0.145                     
## X2       -0.399                              0.203        -0.142       
## odd1      0.223  0.455                       0.182                0.149
## odd2     -0.393                              0.248                0.397
## oddX     -0.346  0.244         0.119         0.256        -0.337       
## over                           0.728  0.556 -0.381                     
## under                         -0.635  0.763 -0.107                     
## over2.5   0.264 -0.307         0.150  0.242  0.601        -0.209 -0.557
## under2.5 -0.308  0.299               -0.117 -0.406        -0.505 -0.494
##          Comp.10 Comp.11 Comp.12 Comp.13 Comp.14
## ha1       0.134           0.185   0.681   0.401 
## ha2       0.298  -0.657   0.394  -0.189   0.192 
## ah1                                             
## ah2                                             
## 12               -0.185                         
## 1X                        0.469  -0.203  -0.657 
## X2        0.107  -0.190  -0.419   0.492  -0.544 
## odd1      0.205  -0.202  -0.633  -0.414   0.142 
## odd2      0.331   0.668          -0.174   0.124 
## oddX     -0.757                  -0.108   0.173 
## over                                            
## under                                           
## over2.5   0.169                                 
## under2.5  0.354
par(mfrow=c(1,1))
plot(pca_x, main = "Paddy Power PCA")

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "Paddy Power O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "Paddy Power Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)

plot(mds_manh[,1],mds_manh[,2],main='Paddy Power  O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.7)

plot(mds_eucl[,1],mds_eucl[,2],main='Paddy Power O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

10Bet

summary(pca_x, loadings = T)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3    Comp.4     Comp.5
## Standard deviation     2.1711414 1.6078286 1.2138753 1.1496554 0.82719620
## Proportion of Variance 0.3928212 0.2154261 0.1227911 0.1101423 0.05702113
## Cumulative Proportion  0.3928212 0.6082473 0.7310384 0.8411807 0.89820182
##                            Comp.6     Comp.7     Comp.8     Comp.9
## Standard deviation     0.72413061 0.65725218 0.41917875 0.21836428
## Proportion of Variance 0.04369709 0.03599837 0.01464257 0.00397358
## Cumulative Proportion  0.94189891 0.97789728 0.99253985 0.99651343
##                            Comp.10     Comp.11      Comp.12
## Standard deviation     0.133147062 0.116370757 0.1028033192
## Proportion of Variance 0.001477345 0.001128513 0.0008807102
## Cumulative Proportion  0.997990777 0.999119290 1.0000000000
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## ah1              0.181  0.685                0.135  0.689              
## ah2      -0.105 -0.191 -0.632        -0.301  0.118  0.667              
## 12        0.385 -0.235                      -0.186         0.835 -0.221
## 1X        0.167  0.554 -0.145        -0.114  0.224         0.121       
## X2       -0.420 -0.194  0.123        -0.104  0.207         0.122       
## odd1      0.167  0.548 -0.155        -0.107  0.275               -0.134
## odd2     -0.417 -0.189  0.105        -0.104  0.283 -0.101  0.151 -0.284
## oddX     -0.415  0.185               -0.139  0.324 -0.128  0.296       
## over                           0.725 -0.611 -0.242        -0.161       
## under                   0.204 -0.678 -0.650 -0.249                     
## over2.5   0.347 -0.287               -0.175  0.545 -0.144         0.654
## under2.5 -0.376  0.275                0.107 -0.402         0.350  0.642
##          Comp.10 Comp.11 Comp.12
## ah1                             
## ah2                             
## 12                              
## 1X                       -0.745 
## X2       -0.817          -0.124 
## odd1     -0.196   0.296   0.636 
## odd2      0.478   0.585         
## oddX      0.248  -0.694   0.142 
## over                            
## under                           
## over2.5                         
## under2.5          0.263
par(mfrow=c(1,1))
plot(pca_x,  main = "10Bet PCA")

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "10Bet O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "10Bet Outcomes PCA")
legend("topright", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)

plot(mds_manh[,1],mds_manh[,2],main='10Bet O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(mds_eucl[,1],mds_eucl[,2],main='10Bet O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

BetVictor

summary(pca_x, loadings = T)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4     Comp.5
## Standard deviation     2.4083193 1.8175315 1.2313932 1.09126603 0.87939629
## Proportion of Variance 0.4142859 0.2359586 0.1083092 0.08506154 0.05523842
## Cumulative Proportion  0.4142859 0.6502445 0.7585537 0.84361525 0.89885367
##                            Comp.6    Comp.7     Comp.8      Comp.9
## Standard deviation     0.79635584 0.6921044 0.41309690 0.221412790
## Proportion of Variance 0.04529876 0.0342149 0.01218922 0.003501687
## Cumulative Proportion  0.94415243 0.9783673 0.99055654 0.994058227
##                            Comp.10     Comp.11     Comp.12      Comp.13
## Standard deviation     0.167438153 0.143916967 0.121140551 0.1145068728
## Proportion of Variance 0.002002538 0.001479435 0.001048217 0.0009365589
## Cumulative Proportion  0.996060765 0.997540200 0.998588417 0.9995249757
##                             Comp.14
## Standard deviation     0.0815496142
## Proportion of Variance 0.0004750243
## Cumulative Proportion  1.0000000000
## 
## Loadings:
##          Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## ha1       0.184  0.480                       0.203  0.103              
## ha2      -0.397                              0.250  0.130  0.118  0.408
## ah1                    -0.663 -0.201  0.123  0.368 -0.598              
## ah2                     0.666  0.212         0.270 -0.642  0.130       
## 12        0.328 -0.283                                    -0.721  0.517
## 1X        0.203  0.469                       0.150                0.123
## X2       -0.401                              0.222        -0.142       
## odd1      0.207  0.463                       0.196  0.103              
## odd2     -0.397                              0.259  0.129         0.184
## oddX     -0.355  0.239                       0.259        -0.251 -0.133
## over                    0.273 -0.647  0.694 -0.140                     
## under                  -0.145  0.701  0.691                            
## over2.5   0.269 -0.304                       0.570  0.284 -0.149 -0.578
## under2.5 -0.313  0.290                      -0.288 -0.256 -0.574 -0.382
##          Comp.10 Comp.11 Comp.12 Comp.13 Comp.14
## ha1                               0.226   0.783 
## ha2       0.666          -0.167  -0.314         
## ah1                                             
## ah2                                             
## 12                                              
## 1X                       -0.550   0.364  -0.494 
## X2               -0.635   0.283   0.510         
## odd1                      0.671  -0.327  -0.349 
## odd2     -0.357   0.684   0.173   0.296         
## oddX     -0.490  -0.222  -0.317  -0.508         
## over                                            
## under                                           
## over2.5   0.219   0.124                         
## under2.5  0.353   0.240
par(mfrow=c(1,1))
plot(pca_x, main = "BetVictor PCA")

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "BetVictor O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "BetVictor Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)

plot(mds_manh[,1],mds_manh[,2],main='BetVictor O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottom", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(mds_eucl[,1],mds_eucl[,2],main='BetVictor O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottomleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

William Hill

summary(pca_x, loadings = T)
## Importance of components:
##                           Comp.1    Comp.2    Comp.3     Comp.4
## Standard deviation     2.2990304 1.5966991 1.3460465 0.45662863
## Proportion of Variance 0.5285541 0.2549448 0.1811841 0.02085097
## Cumulative Proportion  0.5285541 0.7834989 0.9646830 0.98553395
##                             Comp.5     Comp.6      Comp.7      Comp.8
## Standard deviation     0.226661341 0.20857828 0.147158541 0.107671999
## Proportion of Variance 0.005137536 0.00435049 0.002165564 0.001159326
## Cumulative Proportion  0.990671491 0.99502198 0.997187545 0.998346871
##                              Comp.9     Comp.10
## Standard deviation     0.0993001241 0.081674842
## Proportion of Variance 0.0009860515 0.000667078
## Cumulative Proportion  0.9993329220 1.000000000
## 
## Loadings:
##      Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
## ha1   0.309  0.438                0.171                       0.155
## ha2  -0.422  0.125         0.120  0.274  0.104  0.267  0.160 -0.767
## 12    0.222 -0.422  0.284  0.821                                   
## 1X    0.333  0.398                0.172  0.117         0.679       
## X2   -0.423                0.111  0.204  0.206  0.608         0.571
## YES  -0.149  0.179  0.655         0.270 -0.657                     
## NO    0.139 -0.168 -0.664  0.122  0.388 -0.586                     
## odd1  0.323  0.413         0.113  0.213  0.108  0.153 -0.700 -0.155
## odd2 -0.420  0.115         0.204  0.402  0.199 -0.720 -0.124  0.174
## oddX -0.257  0.452 -0.184  0.459 -0.624 -0.302                     
##      Comp.10
## ha1   0.798 
## ha2   0.134 
## 12          
## 1X   -0.462 
## X2   -0.109 
## YES         
## NO          
## odd1 -0.342 
## odd2        
## oddX
par(mfrow=c(1,1))
plot(pca_x, main = "William Hill PCA")

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "William Hill O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "William Hill Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)

plot(mds_manh[,1],mds_manh[,2],main='William Hill O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottomleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

plot(mds_eucl[,1],mds_eucl[,2],main='William Hill O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

Task 3

In this task I hose an image of a milk carton. First the image is displayed using the rasterImage() function and then it is split into its rgb channels which are then displayed using the image() function. Then a random, uniform noise is added to the image. After adding the noise some pixel values crossed he threshold value of 1, so the image was scaled using the renorm() function.

This is the image:

plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Milk Carton")
rasterImage(img_raster, 0, 0, 250, 250 )

RGB channels of the image:

par(mfrow=c(1,3))
image(r_img, col=rgb(c(0:255)/255, 0,0), main = "Red")
image(g_img, col=rgb(0,c(0:255)/255,0), main= "Green")
image(b_img, col=rgb(0, 0,c(0:255)/255), main = "Blue")

Image with added noise:

plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Noisy Milk Carton")
rasterImage(img_noise_raster, 0, 0, 250, 250)

RGB channels of the noisy image:

par(mfrow=c(1,3))
image(r_img_noise, col=rgb(c(0:255) /255, 0,0), main = "Red with Noise")
image(g_img_noise, col=rgb(0,c(0:255)/255,0), main= "Green with Noise")
image(b_img_noise, col=rgb(0, 0,c(0:255)/255), main = "Blue with Noise")

## Task 4

In this part, the image is transformed into a grayscale image using the grayscale() function. The grayscale image is then resized to a size of 202x202. This is because I will use 3x3 patches and I wanted nice, round numbers. The patches are computed and kept as rows of a matrix using a while loop. PCA is applied to the resulting matrix and then the image is reconstructed using the 1st, 2nd and 3rd components of the PCA. Finally, the components of PCA are displayed as images.

The image is turned into grayscale image and is displayed as below:

plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Grayscale Image")
rasterImage(gray_image, 0, 0, 250, 250 )

Here I compute 200*200=40000 patches of size 3x3. To do this I use 2 while loops and keep every patch as a vector. I assign these vectors as rows of a matrix B which has 9 columns. Then I apply PCA to the matrix B.

pca_B=princomp(B, cor = T)
summary(pca_B, loadings = T)
## Importance of components:
##                           Comp.1     Comp.2     Comp.3     Comp.4
## Standard deviation     2.4188533 0.89306141 0.84502615 0.60548497
## Proportion of Variance 0.6500946 0.08861763 0.07934102 0.04073467
## Cumulative Proportion  0.6500946 0.73871224 0.81805326 0.85878793
##                            Comp.5     Comp.6     Comp.7     Comp.8
## Standard deviation     0.59434647 0.55581551 0.48465213 0.43657009
## Proportion of Variance 0.03924975 0.03432565 0.02609863 0.02117705
## Cumulative Proportion  0.89803768 0.93236333 0.95846197 0.97963902
##                            Comp.9
## Standard deviation     0.42807576
## Proportion of Variance 0.02036098
## Cumulative Proportion  1.00000000
## 
## Loadings:
##       Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9
##  [1,]  0.320  0.404  0.359  0.353  0.402  0.227  0.433  0.144  0.244
##  [2,]  0.338  0.457         0.197        -0.486 -0.237 -0.483 -0.333
##  [3,]  0.322  0.358 -0.402  0.161 -0.572  0.153 -0.146  0.433  0.147
##  [4,]  0.339         0.457 -0.374         0.333 -0.486  0.236 -0.353
##  [5,]  0.360               -0.587        -0.394                0.603
##  [6,]  0.339        -0.456 -0.370         0.334  0.485 -0.232 -0.363
##  [7,]  0.321 -0.360  0.403  0.163 -0.572  0.151  0.148 -0.433  0.139
##  [8,]  0.338 -0.456         0.199        -0.485  0.231  0.489 -0.328
##  [9,]  0.321 -0.401 -0.358  0.350  0.402  0.228 -0.434 -0.150  0.247

In the PCA applied to the matrix of patches the 1st component covers most of the variance and dominates other components.

Here, the scores 1, 2 and 3 are taken and turned into 200x200 matrices and then displayed as images.

par(mfrow=c(1,3))
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp1")
rasterImage(t1, 0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp2")
rasterImage(t2, 0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp3")
rasterImage(t3, 0, 0, 250, 250 )

Here, the eigenvectors are extracted and recorded as matrices and then displayed as images.

par(mfrow=c(1,3))
plot(c(0, 1), c(0, 1), type = "n", xlab = "", ylab = "", main = "PCA Component 1")
rasterImage(eig1,  0, 0, 1, 1, angle = 0, interpolate = F)
plot(c(0, 1), c(0, 1), type = "n", xlab = "", ylab = "", main = "PCA Component 2")
rasterImage(eig2,  0, 0, 1, 1, angle = 0, interpolate = F )
plot(c(0, 1), c(0, 1), type = "n", xlab = "", ylab = "", main = "PCA Component 3")
rasterImage(eig1, 0, 0, 1, 1, angle = 0, interpolate = F )

Appendix

# TASKS 1 and 2
# All bookmakers have the same body code, omitted columns due to na values are different in each bookmaker.
odds_bm=odds_final[bookmaker=="Betsson"]
bm_wide=dcast(odds_bm,matchId~oddtype,value.var='final_odd')
merged=merge(matches,bm_wide,by='matchId')
merged[,c("type","home", "away","score", "HomeScore", "AwayScore", "TotalScore", "date"):=NULL]
sum(is.na(merged$ah1)) #2280
sum(is.na(merged$ha1)) #265
sum(is.na(merged$`12`)) #331
sum(is.na(merged$YES)) #1115
sum(is.na(merged$odd1)) #0
sum(is.na(merged$over)) #194
sum(is.na(merged$over2.5)) #192
x=merged[,c("YES", "NO", "ah1", "ah2"):=NULL,]
x=na.omit(x)
pca_x=princomp(x[,c("ha1","ha2","12","1X","X2", "odd1", "odd2", "oddX", "over","under","over2.5", "under2.5"),], cor=T)
distmanh=dist(x[,-c("matchId","IsOver")],method="manhattan")
mds_manh=cmdscale(distmanh)
disteucl=dist(x[,-c("matchId","IsOver")],method="euclidean")
mds_eucl=cmdscale(disteucl)
summary(pca_x, loadings = T)
par(mfrow=c(1,1))
plot(pca_x)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "Betsson O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "Betsson Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 1)
plot(mds_manh[,1],mds_manh[,2],main='Betsson O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(mds_eucl[,1],mds_eucl[,2],main='Betsson O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

# Paddy Power
odds_bm=odds_final[bookmaker=="Paddy Power"]
bm_wide=dcast(odds_bm,matchId~oddtype,value.var='final_odd')
merged=merge(matches,bm_wide,by='matchId')
merged[,c("type","home", "away","score", "HomeScore", "AwayScore", "TotalScore", "date"):=NULL]
sum(is.na(merged$ah1)) #222
sum(is.na(merged$ha1)) #213
sum(is.na(merged$`12`)) #192
sum(is.na(merged$YES)) #993
sum(is.na(merged$odd1)) #0
sum(is.na(merged$over)) #212
sum(is.na(merged$over2.5)) #212
x=merged[,c("YES", "NO"):=NULL,]
x=na.omit(x)
pca_x=princomp(x[,c("ha1","ha2","ah1","ah2","12","1X","X2", "odd1", "odd2", "oddX", "over","under","over2.5","under2.5"),], cor=T)
distmanh=dist(x[,-c("matchId","IsOver")],method="manhattan")
mds_manh=cmdscale(distmanh)
disteucl=dist(x[,-c("matchId","IsOver")],method="euclidean")
mds_eucl=cmdscale(disteucl)

summary(pca_x, loadings = T)
par(mfrow=c(1,1))
plot(pca_x)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "Paddy Power O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "Paddy Power Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)
plot(mds_manh[,1],mds_manh[,2],main='Paddy Power  O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.7)
plot(mds_eucl[,1],mds_eucl[,2],main='Paddy Power O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

# 10Bet
odds_bm=odds_final[bookmaker=="10Bet"]
bm_wide=dcast(odds_bm,matchId~oddtype,value.var='final_odd')
merged=merge(matches,bm_wide,by='matchId')
merged[,c("type","home", "away","score", "HomeScore", "AwayScore", "TotalScore", "date"):=NULL]
sum(is.na(merged$ah1)) #192
sum(is.na(merged$ha1)) #2366
sum(is.na(merged$`12`)) #267
sum(is.na(merged$YES)) #997
sum(is.na(merged$odd1)) #0
sum(is.na(merged$over)) #192
sum(is.na(merged$over2.5)) #256
x=merged[,c("ha1", "ha2", "YES", "NO"):=NULL,]
x=na.omit(x)
pca_x=princomp(x[,c("ah1","ah2","12","1X","X2", "odd1", "odd2", "oddX", "over","under","over2.5","under2.5"),], cor=T)
distmanh=dist(x[,-c("matchId","IsOver")],method="manhattan")
mds_manh=cmdscale(distmanh)
disteucl=dist(x[,-c("matchId","IsOver")],method="euclidean")
mds_eucl=cmdscale(disteucl)
summary(pca_x, loadings = T)
par(mfrow=c(1,1))
plot(pca_x)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "10Bet O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "10Bet Outcomes PCA")
legend("topright", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)
plot(mds_manh[,1],mds_manh[,2],main='10Bet O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(mds_eucl[,1],mds_eucl[,2],main='10Bet O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topright", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

# BetVictor
odds_bm=odds_final[bookmaker=="BetVictor"]
bm_wide=dcast(odds_bm,matchId~oddtype,value.var='final_odd')
merged=merge(matches,bm_wide,by='matchId')
merged[,c("type","home", "away","score", "HomeScore", "AwayScore", "TotalScore", "date"):=NULL]
sum(is.na(merged$ah1)) #192
sum(is.na(merged$ha1)) #208
sum(is.na(merged$`12`)) #233
sum(is.na(merged$YES)) #973
sum(is.na(merged$odd1)) #0
sum(is.na(merged$over)) #192
sum(is.na(merged$over2.5)) #268
x=merged[,c( "YES", "NO"):=NULL,]
x=na.omit(x)
pca_x=princomp(x[,c("ha1", "ha2","ah1","ah2","12","1X","X2", "odd1", "odd2", "oddX", "over","under","over2.5","under2.5"),], cor=T)
distmanh=dist(x[,-c("matchId","IsOver")],method="manhattan")
mds_manh=cmdscale(distmanh)
disteucl=dist(x[,-c("matchId","IsOver")],method="euclidean")
mds_eucl=cmdscale(disteucl)
summary(pca_x, loadings = T)
par(mfrow=c(1,1))
plot(pca_x)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "BetVictor O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "BetVictor Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)
plot(mds_manh[,1],mds_manh[,2],main='BetVictor O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottom", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(mds_eucl[,1],mds_eucl[,2],main='BetVictor O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottomleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

# William Hill
odds_bm=odds_final[bookmaker=="William Hill"]
bm_wide=dcast(odds_bm,matchId~oddtype,value.var='final_odd')
merged=merge(matches,bm_wide,by='matchId')
merged[,c("type","home", "away","score", "HomeScore", "AwayScore", "TotalScore", "date"):=NULL]
sum(is.na(merged$ha1)) #784
sum(is.na(merged$`12`)) #294
sum(is.na(merged$YES)) #973
sum(is.na(merged$odd1)) #0
sum(is.na(merged$over)) #1546
sum(is.na(merged$over2.5)) #1546
x=merged[,c( "over", "over2.5", "under", "under2.5"):=NULL,]
x=na.omit(x)
pca_x=princomp(x[,c("ha1", "ha2","12","1X","X2","YES","NO", "odd1", "odd2", "oddX"),], cor=T)
distmanh=dist(x[,-c("matchId","IsOver")],method="manhattan")
mds_manh=cmdscale(distmanh)
disteucl=dist(x[,-c("matchId","IsOver")],method="euclidean")
mds_eucl=cmdscale(disteucl)

summary(pca_x, loadings = T)
par(mfrow=c(1,1))
plot(pca_x)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$IsOver+1,pch=".",cex=7, main = "William Hill O/U PCA")
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(pca_x$scores[,1],pca_x$scores[,2],col=x$Outcome+2,pch=".",cex=7, main = "William Hill Outcomes PCA")
legend("topleft", legend= c("Home", "Tie", "Away"), col = c(3,2,1), pch="-", cex= 0.9)
plot(mds_manh[,1],mds_manh[,2],main='William Hill O/U Manhattan MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("bottomleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)
plot(mds_eucl[,1],mds_eucl[,2],main='William Hill O/U Euclidean MDS',xlab='', ylab='',col=x$IsOver+1,pch=".",cex=7)
legend("topleft", legend = c("Over 2.5", "Under 2.5"), col=c(2,1),pch="-", cex=0.9)

# TASK 3
# the full code for task 3
require(jpeg)
require(imager)
img_raster=readJPEG("/Users/silaaydin/Desktop/fall18/582/hw/hw2/image.JPEG", native = T)
img=readJPEG("/Users/silaaydin/Desktop/fall18/582/hw/hw2/image.JPEG", native = F)
#display the image
par(mfrow=c(1,1))
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Milk Carton")
rasterImage(img_raster, 0, 0, 250, 250 )
# split the channels of the image into three matrices
r_img=t(apply(img[,,1], 2, rev))
g_img=t(apply(img[,,2], 2, rev))
b_img=t(apply(img[,,3], 2, rev))
# display the rgb channels
par(mfrow=c(1,3))
image(r_img, col=rgb(c(0:255)/255, 0,0), main = "Red")
image(g_img, col=rgb(0,c(0:255)/255,0), main= "Green")
image(b_img, col=rgb(0, 0,c(0:255)/255), main = "Blue")
# generate 512*512*3 uniform random variables for every pixel value of the image
unifnoise=runif(512*512*3, min=0, max=0.1)
noise=array(unifnoise, c(512, 512, 3)) # arrange noise as a three dimensional array
img_noise=noise+img
# here the image is scaled because some of the pixel values were more than 1 after adding the noise
img_noise=imager::renorm(img_noise, min=0, max=1)
img_noise_raster=as.raster(img_noise)
par(mfrow=c(1,1))
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Noisy Milk Carton")
rasterImage(img_noise_raster, 0, 0, 250, 250)
# split the channels of the noisy image into three matrices
r_img_noise=t(apply(img_noise[,,1], 2, rev))
g_img_noise=t(apply(img_noise[,,2], 2, rev))
b_img_noise=t(apply(img_noise[,,3], 2, rev))
# display the rgb channels of the noisy image
par(mfrow=c(1,3))
image(r_img_noise, col=rgb(c(0:255) /255, 0,0), main = "Red with Noise")
image(g_img_noise, col=rgb(0,c(0:255)/255,0), main= "Green with Noise")
image(b_img_noise, col=rgb(0, 0,c(0:255)/255), main = "Blue with Noise")

#TASK 4
require(OpenImageR)
gray_image=imager::as.cimg(img_noise)
gray_image=imager::grayscale(gray_image, method = "Luma")
gray_image=apply(t(as.matrix(gray_image)), 2, rev)
gray_image=t(apply(gray_image, 2, rev))
small_gray_image=gray_image
gray_image=as.raster(gray_image)
par(mfrow=c(1,1))
#display the grayscale image
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Grayscale Image")
rasterImage(gray_image, 0, 0, 250, 250 )

# resize the image to 202*202 to have round numbers when I make the patches
small_gray_image=resizeImage(small_gray_image, 202, 202, method = "nearest")
# the resulting data matrix will have 200*200 = 40000 rows and 9 columns for the pixels in every patch
B=matrix(1:360000, nrow = 40000, ncol = 9)
i=1
k=1
while(i<=200){
  j=1
  while(j<=200){
  x=small_gray_image[(i):(2+i),(j):(2+j)]
  x=as.vector(x)
  B[k, ]=x
  j=j+1
  k=k+1
  }
  i=i+1
}
pca_B=princomp(B, cor = T)
summary(pca_B, loadings = T)
#take component 1 and reconstruct the image
t1=matrix(pca_B$scores[,1], nrow = 200, ncol =200)
t1=t(t1)
t1=imager::renorm(t1, min = 0, max = 1)
t1=as.raster(t1)
#take component 2 and reconstruct the image
t2=matrix(pca_B$scores[,2], nrow = 200, ncol =200)
t2=t(t2)
t2=imager::renorm(t2, min = 0, max = 1)
t2=as.raster(t2)
#take component 3 and reconstruct the image
t3=matrix(pca_B$scores[,3], nrow = 200, ncol =200)
t3=t(t3)
t3=imager::renorm(t3, min = 0, max = 1)
t3=as.raster(t3)
par(mfrow=c(1,3))
#plot the reconstructed images
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp1")
rasterImage(t1, 0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp2")
rasterImage(t2, 0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "Reconstruct w Comp3")
rasterImage(t3, 0, 0, 250, 250 )
# store the eigenvectors of the pca in 3 different matrices
eig1=matrix(pca_B$loadings[,1], nrow = 3, ncol = 3)
eig1=imager::renorm(eig1, min = 0 , max = 1)
eig2=matrix(pca_B$loadings[,2], nrow = 3, ncol = 3)
eig2=imager::renorm(eig2, min = 0 , max = 1)
eig3=matrix(pca_B$loadings[,3], nrow = 3, ncol = 3)
eig3=imager::renorm(eig3, min = 0 , max = 1)
par(mfrow=c(1,3))
# plot the eogenvectors as images
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "PCA Component 1")
rasterImage(eig1,  0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "PCA Component 2")
rasterImage(eig2,  0, 0, 250, 250 )
plot(c(0, 250), c(0, 250), type = "n", xlab = "", ylab = "", main = "PCA Component 3")
rasterImage(eig1, 0, 0, 250, 250 )